import pandas as pd #for dataframe operations
import seaborn as sns #for plotting visuals
import matplotlib.pyplot as plt #for extra plotting options
from sklearn.model_selection import train_test_split #for splitting dataset
from sklearn.linear_model import LinearRegression #for linear regression model
pd.set_option("display.max_rows", None, 'display.max_columns', None) #for entire output
df = pd.read_csv('11752740/RTU.csv')
df.head()
| Timestamp | RTU: Supply Air Temperature | RTU: Return Air Temperature | RTU: Supply Air Fan Status | RTU: Circuit 1 Discharge Temperature | RTU: Circuit 1 Condenser Outlet Temperature | RTU: Circuit 1 Suction Temperature | RTU: Circuit 2 Discharge Temperature | RTU: Circuit 2 Condenser Outlet Temperature | RTU: Circuit 2 Suction Temperature | RTU: Circuit 1 Discharge Pressure | RTU: Circuit 1 Condenser Outlet Pressure | RTU: Circuit 1 Suction Pressure | RTU: Circuit 2 Discharge Pressure | RTU: Circuit 2 Condenser Outlet Pressure | RTU: Circuit 2 Suction Pressure | RTU: Supply Air Volumetric Flow Rate | RTU: Electricity | RTU: Natural Gas | Occupancy Mode Indicator | Terminal: Room 102 Air Humidity | Terminal: Room 103 Air Humidity | Terminal: Room 104 Air Humidity | Terminal: Room 105 Air Humidity | Terminal: Room 106 Air Humidity | Terminal: Room 202 Air Humidity | Terminal: Room 203 Air Humidity | Terminal: Room 204 Air Humidity | Terminal: Room 205 Air Humidity | Terminal: Room 206 Air Humidity | Terminal: Room 102 Air Temperature | Terminal: Room 103 Air Temperature | Terminal: Room 104 Air Temperature | Terminal: Room 105 Air Temperature | Terminal: Room 106 Air Temperature | Terminal: Room 202 Air Temperature | Terminal: Room 203 Air Temperature | Terminal: Room 204 Air Temperature | Terminal: Room 205 Air Temperature | Terminal: Room 206 Air Temperature | Terminal: Room Air Temperature Heating Setpoint | Terminal: Room Air Temperature Cooling Setpoint | HVAC System: Electricity | Lighting System: Electricity | Lighting System: Control Command | RTU: Compressor 1 On/Off Status | RTU: Compressor 2 On/Off Status | RTU: Fan Electricity | VAV Box: Room 102 Reheat Status | VAV Box: Room 103 Reheat Status | VAV Box: Room 104 Reheat Status | VAV Box: Room 105 Reheat Status | VAV Box: Room 106 Reheat Status | VAV Box: Room 202 Reheat Status | VAV Box: Room 203 Reheat Status | VAV Box: Room 204 Reheat Status | VAV Box: Room 205 Reheat Status | VAV Box: Room 206 Reheat Status | VAV Box: Room 102 Air Temperature | VAV Box: Room 103 Air Temperature | VAV Box: Room 104 Air Temperature | VAV Box: Room 105 Air Temperature | VAV Box: Room 106 Air Temperature | VAV Box: Room 202 Air Temperature | VAV Box: Room 203 Air Temperature | VAV Box: Room 204 Air Temperature | VAV Box: Room 205 Air Temperature | VAV Box: Room 206 Air Temperature | Fault Detection Ground Truth | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8/27/2017 0:00 | NaN | NaN | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.07747 | 0.0 | 0 | 50.06 | 47.64 | 49.42 | 47.12 | 49.55 | 48.64 | 47.86 | 45.73 | 44.14 | 44.67 | 72.32 | 74.03 | 72.02 | 72.83 | 72.92 | 73.14 | 72.67 | 72.84 | 74.62 | 74.87 | 60.0 | 80.0 | 3.50865 | 10.57066 | 0 | 0 | 0 | 0.32319 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 |
| 1 | 8/27/2017 0:01 | NaN | NaN | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.30830 | 0.0 | 0 | 49.97 | 47.64 | 49.47 | 47.17 | 49.61 | 48.66 | 48.05 | 45.76 | 44.04 | 44.68 | 72.29 | 73.97 | 72.01 | 72.85 | 72.95 | 73.17 | 72.66 | 72.85 | 74.58 | 74.93 | 60.0 | 80.0 | 3.70485 | 10.57066 | 0 | 0 | 0 | 0.32319 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 |
| 2 | 8/27/2017 0:02 | NaN | NaN | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.07747 | 0.0 | 0 | 50.06 | 47.69 | 49.43 | 47.20 | 49.65 | 48.71 | 47.95 | 45.76 | 44.12 | 44.72 | 72.34 | 74.05 | 71.99 | 72.90 | 72.97 | 73.21 | 72.72 | 72.88 | 74.66 | 74.92 | 60.0 | 80.0 | 3.54327 | 10.55142 | 0 | 0 | 0 | 0.32319 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 |
| 3 | 8/27/2017 0:03 | NaN | NaN | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.07747 | 0.0 | 0 | 50.10 | 47.67 | 49.57 | 47.21 | 49.65 | 48.68 | 47.90 | 45.79 | 44.02 | 44.88 | 72.31 | 74.08 | 72.03 | 72.85 | 72.99 | 73.20 | 72.72 | 72.87 | 74.64 | 74.99 | 60.0 | 80.0 | 3.42785 | 10.57451 | 0 | 0 | 0 | 0.27702 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 |
| 4 | 8/27/2017 0:04 | NaN | NaN | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.07747 | 0.0 | 0 | 50.02 | 47.69 | 49.50 | 47.24 | 49.59 | 48.71 | 47.98 | 45.79 | 44.01 | 44.82 | 72.29 | 74.04 | 72.00 | 72.90 | 72.94 | 73.18 | 72.70 | 72.85 | 74.51 | 74.97 | 60.0 | 80.0 | 3.47402 | 10.57066 | 0 | 0 | 0 | 0.32319 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 30240 entries, 0 to 30239 Data columns (total 69 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Timestamp 30240 non-null object 1 RTU: Supply Air Temperature 20156 non-null float64 2 RTU: Return Air Temperature 20156 non-null float64 3 RTU: Supply Air Fan Status 30240 non-null int64 4 RTU: Circuit 1 Discharge Temperature 15028 non-null float64 5 RTU: Circuit 1 Condenser Outlet Temperature 15028 non-null float64 6 RTU: Circuit 1 Suction Temperature 15028 non-null float64 7 RTU: Circuit 2 Discharge Temperature 15028 non-null float64 8 RTU: Circuit 2 Condenser Outlet Temperature 15028 non-null float64 9 RTU: Circuit 2 Suction Temperature 15028 non-null float64 10 RTU: Circuit 1 Discharge Pressure 15028 non-null float64 11 RTU: Circuit 1 Condenser Outlet Pressure 2874 non-null float64 12 RTU: Circuit 1 Suction Pressure 15028 non-null float64 13 RTU: Circuit 2 Discharge Pressure 15028 non-null float64 14 RTU: Circuit 2 Condenser Outlet Pressure 15028 non-null float64 15 RTU: Circuit 2 Suction Pressure 15028 non-null float64 16 RTU: Supply Air Volumetric Flow Rate 20129 non-null float64 17 RTU: Electricity 30240 non-null float64 18 RTU: Natural Gas 30240 non-null float64 19 Occupancy Mode Indicator 30240 non-null int64 20 Terminal: Room 102 Air Humidity 30240 non-null float64 21 Terminal: Room 103 Air Humidity 30240 non-null float64 22 Terminal: Room 104 Air Humidity 30240 non-null float64 23 Terminal: Room 105 Air Humidity 30240 non-null float64 24 Terminal: Room 106 Air Humidity 30240 non-null float64 25 Terminal: Room 202 Air Humidity 30240 non-null float64 26 Terminal: Room 203 Air Humidity 30240 non-null float64 27 Terminal: Room 204 Air Humidity 30240 non-null float64 28 Terminal: Room 205 Air Humidity 30240 non-null float64 29 Terminal: Room 206 Air Humidity 30240 non-null float64 30 Terminal: Room 102 Air Temperature 30240 non-null float64 31 Terminal: Room 103 Air Temperature 30240 non-null float64 32 Terminal: Room 104 Air Temperature 30240 non-null float64 33 Terminal: Room 105 Air Temperature 30240 non-null float64 34 Terminal: Room 106 Air Temperature 30240 non-null float64 35 Terminal: Room 202 Air Temperature 30240 non-null float64 36 Terminal: Room 203 Air Temperature 30240 non-null float64 37 Terminal: Room 204 Air Temperature 30240 non-null float64 38 Terminal: Room 205 Air Temperature 30240 non-null float64 39 Terminal: Room 206 Air Temperature 30240 non-null float64 40 Terminal: Room Air Temperature Heating Setpoint 30240 non-null float64 41 Terminal: Room Air Temperature Cooling Setpoint 30240 non-null float64 42 HVAC System: Electricity 30240 non-null float64 43 Lighting System: Electricity 30240 non-null float64 44 Lighting System: Control Command 30240 non-null int64 45 RTU: Compressor 1 On/Off Status 30240 non-null int64 46 RTU: Compressor 2 On/Off Status 30240 non-null int64 47 RTU: Fan Electricity 30240 non-null float64 48 VAV Box: Room 102 Reheat Status 30240 non-null int64 49 VAV Box: Room 103 Reheat Status 30240 non-null int64 50 VAV Box: Room 104 Reheat Status 30240 non-null int64 51 VAV Box: Room 105 Reheat Status 30240 non-null int64 52 VAV Box: Room 106 Reheat Status 30240 non-null int64 53 VAV Box: Room 202 Reheat Status 30240 non-null int64 54 VAV Box: Room 203 Reheat Status 30240 non-null int64 55 VAV Box: Room 204 Reheat Status 30240 non-null int64 56 VAV Box: Room 205 Reheat Status 30240 non-null int64 57 VAV Box: Room 206 Reheat Status 30240 non-null int64 58 VAV Box: Room 102 Air Temperature 20156 non-null float64 59 VAV Box: Room 103 Air Temperature 20156 non-null float64 60 VAV Box: Room 104 Air Temperature 20156 non-null float64 61 VAV Box: Room 105 Air Temperature 20156 non-null float64 62 VAV Box: Room 106 Air Temperature 20156 non-null float64 63 VAV Box: Room 202 Air Temperature 20156 non-null float64 64 VAV Box: Room 203 Air Temperature 20156 non-null float64 65 VAV Box: Room 204 Air Temperature 20156 non-null float64 66 VAV Box: Room 205 Air Temperature 20156 non-null float64 67 VAV Box: Room 206 Air Temperature 20156 non-null float64 68 Fault Detection Ground Truth 30240 non-null int64 dtypes: float64(52), int64(16), object(1) memory usage: 15.9+ MB
df.isnull().sum().sort_values(ascending=False) #summing NAs and displaying in order
RTU: Circuit 1 Condenser Outlet Pressure 27366 RTU: Circuit 2 Suction Temperature 15212 RTU: Circuit 1 Suction Pressure 15212 RTU: Circuit 2 Suction Pressure 15212 RTU: Circuit 2 Condenser Outlet Pressure 15212 RTU: Circuit 1 Discharge Temperature 15212 RTU: Circuit 1 Condenser Outlet Temperature 15212 RTU: Circuit 1 Suction Temperature 15212 RTU: Circuit 2 Discharge Temperature 15212 RTU: Circuit 2 Condenser Outlet Temperature 15212 RTU: Circuit 2 Discharge Pressure 15212 RTU: Circuit 1 Discharge Pressure 15212 RTU: Supply Air Volumetric Flow Rate 10111 VAV Box: Room 203 Air Temperature 10084 VAV Box: Room 103 Air Temperature 10084 VAV Box: Room 102 Air Temperature 10084 VAV Box: Room 104 Air Temperature 10084 VAV Box: Room 105 Air Temperature 10084 VAV Box: Room 202 Air Temperature 10084 VAV Box: Room 106 Air Temperature 10084 VAV Box: Room 204 Air Temperature 10084 VAV Box: Room 205 Air Temperature 10084 VAV Box: Room 206 Air Temperature 10084 RTU: Return Air Temperature 10084 RTU: Supply Air Temperature 10084 Lighting System: Control Command 0 VAV Box: Room 104 Reheat Status 0 RTU: Compressor 1 On/Off Status 0 RTU: Compressor 2 On/Off Status 0 RTU: Fan Electricity 0 VAV Box: Room 102 Reheat Status 0 VAV Box: Room 103 Reheat Status 0 Timestamp 0 VAV Box: Room 105 Reheat Status 0 VAV Box: Room 106 Reheat Status 0 VAV Box: Room 202 Reheat Status 0 VAV Box: Room 203 Reheat Status 0 VAV Box: Room 204 Reheat Status 0 VAV Box: Room 205 Reheat Status 0 VAV Box: Room 206 Reheat Status 0 HVAC System: Electricity 0 Lighting System: Electricity 0 Terminal: Room 106 Air Temperature 0 Terminal: Room Air Temperature Cooling Setpoint 0 Terminal: Room Air Temperature Heating Setpoint 0 RTU: Supply Air Fan Status 0 RTU: Electricity 0 RTU: Natural Gas 0 Occupancy Mode Indicator 0 Terminal: Room 102 Air Humidity 0 Terminal: Room 103 Air Humidity 0 Terminal: Room 104 Air Humidity 0 Terminal: Room 105 Air Humidity 0 Terminal: Room 106 Air Humidity 0 Terminal: Room 202 Air Humidity 0 Terminal: Room 203 Air Humidity 0 Terminal: Room 204 Air Humidity 0 Terminal: Room 205 Air Humidity 0 Terminal: Room 206 Air Humidity 0 Terminal: Room 102 Air Temperature 0 Terminal: Room 103 Air Temperature 0 Terminal: Room 104 Air Temperature 0 Terminal: Room 105 Air Temperature 0 Terminal: Room 202 Air Temperature 0 Terminal: Room 203 Air Temperature 0 Terminal: Room 204 Air Temperature 0 Terminal: Room 205 Air Temperature 0 Terminal: Room 206 Air Temperature 0 Fault Detection Ground Truth 0 dtype: int64
#this column is useless for analysis because of over 90% NAs
df.drop(columns='RTU: Circuit 1 Condenser Outlet Pressure', axis=1, inplace=True)
#dropping these columns because their values are meaningless
df.drop(columns=['RTU: Supply Air Fan Status', 'RTU: Natural Gas', 'Fault Detection Ground Truth'],
axis=1, inplace=True)
#visual showing distribution of null values
sns.heatmap(df.isnull(), cbar=False, cmap='viridis')
<AxesSubplot:>
#dropping all the NAs and duplicates
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
#about 50% data left for analysis
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 14982 entries, 403 to 30116 Data columns (total 65 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Timestamp 14982 non-null object 1 RTU: Supply Air Temperature 14982 non-null float64 2 RTU: Return Air Temperature 14982 non-null float64 3 RTU: Circuit 1 Discharge Temperature 14982 non-null float64 4 RTU: Circuit 1 Condenser Outlet Temperature 14982 non-null float64 5 RTU: Circuit 1 Suction Temperature 14982 non-null float64 6 RTU: Circuit 2 Discharge Temperature 14982 non-null float64 7 RTU: Circuit 2 Condenser Outlet Temperature 14982 non-null float64 8 RTU: Circuit 2 Suction Temperature 14982 non-null float64 9 RTU: Circuit 1 Discharge Pressure 14982 non-null float64 10 RTU: Circuit 1 Suction Pressure 14982 non-null float64 11 RTU: Circuit 2 Discharge Pressure 14982 non-null float64 12 RTU: Circuit 2 Condenser Outlet Pressure 14982 non-null float64 13 RTU: Circuit 2 Suction Pressure 14982 non-null float64 14 RTU: Supply Air Volumetric Flow Rate 14982 non-null float64 15 RTU: Electricity 14982 non-null float64 16 Occupancy Mode Indicator 14982 non-null int64 17 Terminal: Room 102 Air Humidity 14982 non-null float64 18 Terminal: Room 103 Air Humidity 14982 non-null float64 19 Terminal: Room 104 Air Humidity 14982 non-null float64 20 Terminal: Room 105 Air Humidity 14982 non-null float64 21 Terminal: Room 106 Air Humidity 14982 non-null float64 22 Terminal: Room 202 Air Humidity 14982 non-null float64 23 Terminal: Room 203 Air Humidity 14982 non-null float64 24 Terminal: Room 204 Air Humidity 14982 non-null float64 25 Terminal: Room 205 Air Humidity 14982 non-null float64 26 Terminal: Room 206 Air Humidity 14982 non-null float64 27 Terminal: Room 102 Air Temperature 14982 non-null float64 28 Terminal: Room 103 Air Temperature 14982 non-null float64 29 Terminal: Room 104 Air Temperature 14982 non-null float64 30 Terminal: Room 105 Air Temperature 14982 non-null float64 31 Terminal: Room 106 Air Temperature 14982 non-null float64 32 Terminal: Room 202 Air Temperature 14982 non-null float64 33 Terminal: Room 203 Air Temperature 14982 non-null float64 34 Terminal: Room 204 Air Temperature 14982 non-null float64 35 Terminal: Room 205 Air Temperature 14982 non-null float64 36 Terminal: Room 206 Air Temperature 14982 non-null float64 37 Terminal: Room Air Temperature Heating Setpoint 14982 non-null float64 38 Terminal: Room Air Temperature Cooling Setpoint 14982 non-null float64 39 HVAC System: Electricity 14982 non-null float64 40 Lighting System: Electricity 14982 non-null float64 41 Lighting System: Control Command 14982 non-null int64 42 RTU: Compressor 1 On/Off Status 14982 non-null int64 43 RTU: Compressor 2 On/Off Status 14982 non-null int64 44 RTU: Fan Electricity 14982 non-null float64 45 VAV Box: Room 102 Reheat Status 14982 non-null int64 46 VAV Box: Room 103 Reheat Status 14982 non-null int64 47 VAV Box: Room 104 Reheat Status 14982 non-null int64 48 VAV Box: Room 105 Reheat Status 14982 non-null int64 49 VAV Box: Room 106 Reheat Status 14982 non-null int64 50 VAV Box: Room 202 Reheat Status 14982 non-null int64 51 VAV Box: Room 203 Reheat Status 14982 non-null int64 52 VAV Box: Room 204 Reheat Status 14982 non-null int64 53 VAV Box: Room 205 Reheat Status 14982 non-null int64 54 VAV Box: Room 206 Reheat Status 14982 non-null int64 55 VAV Box: Room 102 Air Temperature 14982 non-null float64 56 VAV Box: Room 103 Air Temperature 14982 non-null float64 57 VAV Box: Room 104 Air Temperature 14982 non-null float64 58 VAV Box: Room 105 Air Temperature 14982 non-null float64 59 VAV Box: Room 106 Air Temperature 14982 non-null float64 60 VAV Box: Room 202 Air Temperature 14982 non-null float64 61 VAV Box: Room 203 Air Temperature 14982 non-null float64 62 VAV Box: Room 204 Air Temperature 14982 non-null float64 63 VAV Box: Room 205 Air Temperature 14982 non-null float64 64 VAV Box: Room 206 Air Temperature 14982 non-null float64 dtypes: float64(50), int64(14), object(1) memory usage: 7.5+ MB
#descriptive statistics from the dataset
df.describe()
| RTU: Supply Air Temperature | RTU: Return Air Temperature | RTU: Circuit 1 Discharge Temperature | RTU: Circuit 1 Condenser Outlet Temperature | RTU: Circuit 1 Suction Temperature | RTU: Circuit 2 Discharge Temperature | RTU: Circuit 2 Condenser Outlet Temperature | RTU: Circuit 2 Suction Temperature | RTU: Circuit 1 Discharge Pressure | RTU: Circuit 1 Suction Pressure | RTU: Circuit 2 Discharge Pressure | RTU: Circuit 2 Condenser Outlet Pressure | RTU: Circuit 2 Suction Pressure | RTU: Supply Air Volumetric Flow Rate | RTU: Electricity | Occupancy Mode Indicator | Terminal: Room 102 Air Humidity | Terminal: Room 103 Air Humidity | Terminal: Room 104 Air Humidity | Terminal: Room 105 Air Humidity | Terminal: Room 106 Air Humidity | Terminal: Room 202 Air Humidity | Terminal: Room 203 Air Humidity | Terminal: Room 204 Air Humidity | Terminal: Room 205 Air Humidity | Terminal: Room 206 Air Humidity | Terminal: Room 102 Air Temperature | Terminal: Room 103 Air Temperature | Terminal: Room 104 Air Temperature | Terminal: Room 105 Air Temperature | Terminal: Room 106 Air Temperature | Terminal: Room 202 Air Temperature | Terminal: Room 203 Air Temperature | Terminal: Room 204 Air Temperature | Terminal: Room 205 Air Temperature | Terminal: Room 206 Air Temperature | Terminal: Room Air Temperature Heating Setpoint | Terminal: Room Air Temperature Cooling Setpoint | HVAC System: Electricity | Lighting System: Electricity | Lighting System: Control Command | RTU: Compressor 1 On/Off Status | RTU: Compressor 2 On/Off Status | RTU: Fan Electricity | VAV Box: Room 102 Reheat Status | VAV Box: Room 103 Reheat Status | VAV Box: Room 104 Reheat Status | VAV Box: Room 105 Reheat Status | VAV Box: Room 106 Reheat Status | VAV Box: Room 202 Reheat Status | VAV Box: Room 203 Reheat Status | VAV Box: Room 204 Reheat Status | VAV Box: Room 205 Reheat Status | VAV Box: Room 206 Reheat Status | VAV Box: Room 102 Air Temperature | VAV Box: Room 103 Air Temperature | VAV Box: Room 104 Air Temperature | VAV Box: Room 105 Air Temperature | VAV Box: Room 106 Air Temperature | VAV Box: Room 202 Air Temperature | VAV Box: Room 203 Air Temperature | VAV Box: Room 204 Air Temperature | VAV Box: Room 205 Air Temperature | VAV Box: Room 206 Air Temperature | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.00000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 | 14982.000000 |
| mean | 54.659773 | 69.226051 | 112.576803 | 72.710517 | 48.850533 | 74.465199 | 62.528706 | 64.469865 | 299.567855 | 142.889824 | 183.821799 | 182.509939 | 168.869240 | 3362.478741 | 104.446442 | 0.956147 | 38.710457 | 36.176014 | 36.733283 | 34.259887 | 35.138231 | 40.460642 | 40.386508 | 35.981113 | 34.856934 | 36.820376 | 70.077453 | 71.454277 | 70.496341 | 71.665397 | 71.381552 | 69.507111 | 68.453243 | 71.405185 | 72.353922 | 71.868989 | 69.370244 | 75.410493 | 293.069275 | 33.794142 | 0.900347 | 0.98912 | 0.061541 | 29.357578 | 0.798225 | 0.087972 | 0.899279 | 0.641236 | 0.678147 | 0.913830 | 0.802163 | 0.979309 | 0.807102 | 0.750901 | 69.543592 | 56.980803 | 69.882689 | 66.619696 | 67.485989 | 60.841741 | 63.319308 | 70.984345 | 68.513801 | 65.655842 |
| std | 4.582580 | 1.647581 | 16.185610 | 14.720345 | 7.133052 | 24.788601 | 15.735633 | 7.219095 | 60.883516 | 16.012927 | 67.680692 | 62.966949 | 14.602717 | 490.234669 | 42.307766 | 0.204774 | 8.495727 | 9.050778 | 8.361382 | 8.334257 | 8.792250 | 8.850866 | 8.429761 | 8.500311 | 8.449510 | 8.220738 | 1.558748 | 1.276909 | 1.516098 | 1.798610 | 1.813452 | 0.793763 | 1.003755 | 1.557012 | 2.317246 | 1.860276 | 2.006785 | 0.982915 | 108.796192 | 9.927757 | 0.299547 | 0.10374 | 0.240327 | 3.366095 | 0.401339 | 0.283264 | 0.300969 | 0.479654 | 0.467203 | 0.280624 | 0.398382 | 0.142354 | 0.394587 | 0.432506 | 10.997601 | 2.653253 | 7.641931 | 9.503227 | 11.354419 | 3.534003 | 6.778761 | 8.136910 | 10.719865 | 8.508881 |
| min | 31.760000 | 62.230000 | 27.050000 | 21.620000 | -1.155000 | 28.550000 | 26.280000 | 0.012000 | 117.400000 | 38.200000 | 87.800000 | 88.100000 | 66.470000 | 591.800000 | 17.543080 | 0.000000 | 21.460000 | 15.690000 | 19.740000 | 15.570000 | 16.060000 | 19.280000 | 18.440000 | 19.170000 | 16.880000 | 17.290000 | 60.000000 | 67.750000 | 61.640000 | 63.030000 | 60.050000 | 66.710000 | 61.650000 | 62.160000 | 63.270000 | 63.850000 | 60.000000 | 75.200000 | 44.225910 | 9.066280 | 0.000000 | 0.00000 | 0.000000 | 9.234000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 50.480000 | 50.420000 | 51.420000 | 49.970000 | 51.230000 | 50.090000 | 47.250000 | 48.100000 | 46.800000 | 50.100000 |
| 25% | 51.180000 | 68.360000 | 102.900000 | 64.130000 | 42.650000 | 62.830000 | 51.480000 | 61.750000 | 264.600000 | 133.100000 | 159.100000 | 159.000000 | 159.100000 | 3155.000000 | 91.408680 | 1.000000 | 31.890000 | 28.590000 | 29.802500 | 27.650000 | 27.750000 | 32.790000 | 33.440000 | 28.980000 | 28.002500 | 30.150000 | 69.340000 | 70.530000 | 69.820000 | 70.970000 | 70.810000 | 68.840000 | 67.900000 | 70.870000 | 71.270000 | 70.950000 | 69.800000 | 75.200000 | 199.332272 | 26.849080 | 1.000000 | 1.00000 | 0.000000 | 27.840510 | 1.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 59.340000 | 54.860000 | 65.510000 | 57.370000 | 57.890000 | 58.222500 | 57.700000 | 65.060000 | 59.310000 | 58.630000 |
| 50% | 53.485000 | 68.760000 | 109.800000 | 71.670000 | 48.480000 | 65.950000 | 57.580000 | 64.820000 | 293.800000 | 138.600000 | 166.600000 | 166.300000 | 166.200000 | 3170.000000 | 102.257700 | 1.000000 | 36.790000 | 34.450000 | 34.970000 | 32.320000 | 34.060000 | 39.080000 | 38.760000 | 34.000000 | 33.395000 | 35.600000 | 70.560000 | 71.690000 | 70.730000 | 71.540000 | 71.250000 | 69.610000 | 68.370000 | 71.700000 | 72.040000 | 71.430000 | 69.800000 | 75.200000 | 290.064155 | 40.272990 | 1.000000 | 1.00000 | 0.000000 | 28.117530 | 1.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 67.960000 | 56.780000 | 70.910000 | 67.750000 | 66.790000 | 60.820000 | 63.070000 | 70.680000 | 67.600000 | 65.120000 |
| 75% | 57.030000 | 69.940000 | 116.700000 | 78.690000 | 54.380000 | 71.820000 | 69.410000 | 67.430000 | 319.000000 | 148.600000 | 180.500000 | 180.700000 | 180.500000 | 3190.000000 | 108.720900 | 1.000000 | 45.980000 | 43.180000 | 43.717500 | 41.180000 | 43.000000 | 48.440000 | 48.507500 | 43.200000 | 42.120000 | 44.280000 | 71.090000 | 72.360000 | 71.360000 | 72.210000 | 72.120000 | 70.020000 | 68.830000 | 72.240000 | 73.740000 | 72.900000 | 69.800000 | 75.200000 | 358.824111 | 40.373047 | 1.000000 | 1.00000 | 0.000000 | 28.533060 | 1.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 80.200000 | 58.700000 | 74.810000 | 73.250000 | 72.410000 | 63.330000 | 67.887500 | 75.880000 | 76.000000 | 70.697500 |
| max | 81.000000 | 76.240000 | 172.700000 | 140.800000 | 72.680000 | 186.400000 | 127.700000 | 104.800000 | 645.500000 | 229.000000 | 640.500000 | 617.000000 | 236.700000 | 4450.000000 | 316.929600 | 1.000000 | 57.920000 | 56.570000 | 56.700000 | 55.290000 | 56.080000 | 60.020000 | 62.600000 | 60.520000 | 58.580000 | 58.080000 | 80.300000 | 80.500000 | 77.040000 | 78.430000 | 76.990000 | 79.590000 | 80.700000 | 77.430000 | 82.800000 | 77.930000 | 69.800000 | 80.000000 | 618.475090 | 42.416420 | 1.000000 | 1.00000 | 1.000000 | 36.843660 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 89.800000 | 73.160000 | 93.200000 | 94.800000 | 107.500000 | 77.030000 | 94.300000 | 96.900000 | 99.200000 | 90.800000 |
sns.heatmap(data=df.describe(), linewidths=.1, cmap='viridis')
<AxesSubplot:>
df.corr()['HVAC System: Electricity'].sort_values() #correlations against target
Lighting System: Electricity -0.488278 Terminal: Room 202 Air Temperature -0.450243 RTU: Circuit 1 Condenser Outlet Temperature -0.420466 RTU: Circuit 2 Condenser Outlet Temperature -0.385797 RTU: Return Air Temperature -0.375820 RTU: Circuit 1 Discharge Temperature -0.370428 Terminal: Room 203 Air Temperature -0.354623 Terminal: Room 102 Air Temperature -0.338295 RTU: Circuit 1 Discharge Pressure -0.324770 RTU: Circuit 2 Suction Pressure -0.321290 RTU: Circuit 2 Discharge Temperature -0.296799 Terminal: Room 103 Air Temperature -0.293220 Lighting System: Control Command -0.283875 Terminal: Room 106 Air Humidity -0.276153 RTU: Circuit 2 Suction Temperature -0.256445 RTU: Supply Air Volumetric Flow Rate -0.255633 Terminal: Room 204 Air Humidity -0.254785 RTU: Fan Electricity -0.252593 Terminal: Room 104 Air Temperature -0.249287 Terminal: Room 206 Air Temperature -0.247486 Terminal: Room 203 Air Humidity -0.237773 Terminal: Room 105 Air Humidity -0.237564 Terminal: Room 205 Air Temperature -0.235353 Terminal: Room 202 Air Humidity -0.232758 Terminal: Room 104 Air Humidity -0.227637 Terminal: Room 103 Air Humidity -0.223923 Terminal: Room 206 Air Humidity -0.217394 RTU: Circuit 1 Suction Pressure -0.204644 Terminal: Room 205 Air Humidity -0.195777 Terminal: Room 102 Air Humidity -0.176710 Terminal: Room 106 Air Temperature -0.155113 RTU: Circuit 1 Suction Temperature -0.137741 RTU: Electricity -0.122145 RTU: Supply Air Temperature -0.120372 Terminal: Room Air Temperature Heating Setpoint -0.113868 Occupancy Mode Indicator -0.113868 RTU: Circuit 2 Condenser Outlet Pressure -0.098959 RTU: Circuit 2 Discharge Pressure -0.094612 Terminal: Room 105 Air Temperature -0.089386 RTU: Compressor 2 On/Off Status -0.071361 Terminal: Room 204 Air Temperature 0.011601 RTU: Compressor 1 On/Off Status 0.039298 Terminal: Room Air Temperature Cooling Setpoint 0.113868 VAV Box: Room 103 Air Temperature 0.126953 VAV Box: Room 103 Reheat Status 0.159653 VAV Box: Room 204 Reheat Status 0.186065 VAV Box: Room 104 Reheat Status 0.274552 VAV Box: Room 202 Reheat Status 0.340293 VAV Box: Room 102 Reheat Status 0.424868 VAV Box: Room 203 Reheat Status 0.447526 VAV Box: Room 202 Air Temperature 0.459157 VAV Box: Room 205 Reheat Status 0.497384 VAV Box: Room 206 Reheat Status 0.571784 VAV Box: Room 203 Air Temperature 0.609623 VAV Box: Room 106 Reheat Status 0.648610 VAV Box: Room 102 Air Temperature 0.680709 VAV Box: Room 105 Reheat Status 0.697665 VAV Box: Room 104 Air Temperature 0.716804 VAV Box: Room 204 Air Temperature 0.740619 VAV Box: Room 106 Air Temperature 0.780435 VAV Box: Room 205 Air Temperature 0.810556 VAV Box: Room 105 Air Temperature 0.853008 VAV Box: Room 206 Air Temperature 0.857091 HVAC System: Electricity 1.000000 Name: HVAC System: Electricity, dtype: float64
fig, ax = plt.subplots(figsize=(50,50))
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
sns.heatmap(df.corr(), annot=True, linewidths=.1, annot_kws={"size": 12})
<AxesSubplot:>
sns.distplot(df['HVAC System: Electricity']) #target distribution
C:\Users\AYUSH\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
<AxesSubplot:xlabel='HVAC System: Electricity', ylabel='Density'>
timestamp = df['Timestamp']
df.drop(columns='Timestamp', axis=1, inplace=True)
X = df.drop(columns='HVAC System: Electricity', axis=1)
y = df['HVAC System: Electricity']
#splitting data into test and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
#initialising the model and fitting into train data
regr = LinearRegression().fit(X_train, y_train)
#score says model performs well on test data
print(f'Coefficient of determination of prediction is {regr.score(X_test, y_test)}')
Coefficient of determination of prediction is 0.992909419976274
y_pred = regr.predict(X_test)
#values scattered around straight line
plt.scatter(y_test, y_pred)
plt.xlabel('Real values')
plt.ylabel('Predicted values')
Text(0, 0.5, 'Predicted values')
df['Timestamp'] = timestamp
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
#creating separate dataframe
dff = df[['Timestamp', 'RTU: Electricity']]
dff.set_index('Timestamp', inplace=True)
dff.sort_values(by='Timestamp', ascending=False).head(n=10)
| RTU: Electricity | |
|---|---|
| Timestamp | |
| 2018-02-18 21:56:00 | 87.02291 |
| 2018-02-18 21:55:00 | 107.56680 |
| 2018-02-18 21:54:00 | 107.56680 |
| 2018-02-18 21:53:00 | 107.33590 |
| 2018-02-18 21:52:00 | 107.56680 |
| 2018-02-18 21:51:00 | 106.18180 |
| 2018-02-18 21:50:00 | 99.25690 |
| 2018-02-18 21:46:00 | 68.55651 |
| 2018-02-18 21:45:00 | 106.64350 |
| 2018-02-18 21:44:00 | 106.87430 |
#creating timeseries
ts = dff['RTU: Electricity']
ts.head()
Timestamp 2017-08-27 06:43:00 108.9518 2017-08-27 06:44:00 130.4189 2017-08-27 06:45:00 140.5755 2017-08-27 06:46:00 246.7573 2017-08-27 06:47:00 303.7723 Name: RTU: Electricity, dtype: float64
#values peaks around Winters
plt.plot(ts)
[<matplotlib.lines.Line2D at 0x26fcab28d30>]